<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">


<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    
    <title>pgmpy.estimators.ExhaustiveSearch &#8212; pgmpy 0.1.2 documentation</title>
    
    <link rel="stylesheet" href="../../../_static/sphinxdoc.css" type="text/css" />
    <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
    
    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '../../../',
        VERSION:     '0.1.2',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  true,
        SOURCELINK_SUFFIX: '.txt'
      };
    </script>
    <script type="text/javascript" src="../../../_static/jquery.js"></script>
    <script type="text/javascript" src="../../../_static/underscore.js"></script>
    <script type="text/javascript" src="../../../_static/doctools.js"></script>
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" /> 
  </head>
  <body role="document">
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="../../../genindex.html" title="General Index"
             accesskey="I">index</a></li>
        <li class="right" >
          <a href="../../../py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="nav-item nav-item-0"><a href="../../../index.html">pgmpy 0.1.2 documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="../../index.html" accesskey="U">Module code</a> &#187;</li> 
      </ul>
    </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
            <p class="logo"><a href="../../../index.html">
              <img class="logo" src="../../../_static/logo.png" alt="Logo"/>
            </a></p>
<div id="searchbox" style="display: none" role="search">
  <h3>Quick search</h3>
    <form class="search" action="../../../search.html" method="get">
      <div><input type="text" name="q" /></div>
      <div><input type="submit" value="Go" /></div>
      <input type="hidden" name="check_keywords" value="yes" />
      <input type="hidden" name="area" value="default" />
    </form>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>
        </div>
      </div>

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body" role="main">
            
  <h1>Source code for pgmpy.estimators.ExhaustiveSearch</h1><div class="highlight"><pre>
<span></span><span class="ch">#!/usr/bin/env python</span>

<span class="kn">from</span> <span class="nn">warnings</span> <span class="k">import</span> <span class="n">warn</span>
<span class="kn">from</span> <span class="nn">itertools</span> <span class="k">import</span> <span class="n">combinations</span>

<span class="kn">import</span> <span class="nn">networkx</span> <span class="k">as</span> <span class="nn">nx</span>

<span class="kn">from</span> <span class="nn">pgmpy.estimators</span> <span class="k">import</span> <span class="n">StructureEstimator</span>
<span class="kn">from</span> <span class="nn">pgmpy.estimators</span> <span class="k">import</span> <span class="n">K2Score</span>
<span class="kn">from</span> <span class="nn">pgmpy.utils.mathext</span> <span class="k">import</span> <span class="n">powerset</span>
<span class="kn">from</span> <span class="nn">pgmpy.models</span> <span class="k">import</span> <span class="n">BayesianModel</span>


<div class="viewcode-block" id="ExhaustiveSearch"><a class="viewcode-back" href="../../../estimators.html#pgmpy.estimators.ExhaustiveSearch.ExhaustiveSearch">[docs]</a><span class="k">class</span> <span class="nc">ExhaustiveSearch</span><span class="p">(</span><span class="n">StructureEstimator</span><span class="p">):</span>
    <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">scoring_method</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
        <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Search class for exhaustive searches over all BayesianModels with a given set of variables.</span>
<span class="sd">        Takes a `StructureScore`-Instance as parameter; `estimate` finds the model with maximal score.</span>

<span class="sd">        Parameters</span>
<span class="sd">        ----------</span>
<span class="sd">        data: pandas DataFrame object</span>
<span class="sd">            datafame object where each column represents one variable.</span>
<span class="sd">            (If some values in the data are missing the data cells should be set to `numpy.NaN`.</span>
<span class="sd">            Note that pandas converts each column containing `numpy.NaN`s to dtype `float`.)</span>

<span class="sd">        scoring_method: Instance of a `StructureScore`-subclass (`K2Score` is used as default)</span>
<span class="sd">            An instance of `K2Score`, `BdeuScore`, or `BicScore`.</span>
<span class="sd">            This score is optimized during structure estimation by the `estimate`-method.</span>

<span class="sd">        state_names: dict (optional)</span>
<span class="sd">            A dict indicating, for each variable, the discrete set of states (or values)</span>
<span class="sd">            that the variable can take. If unspecified, the observed values in the data set</span>
<span class="sd">            are taken to be the only possible states.</span>

<span class="sd">        complete_samples_only: bool (optional, default `True`)</span>
<span class="sd">            Specifies how to deal with missing data, if present. If set to `True` all rows</span>
<span class="sd">            that contain `np.Nan` somewhere are ignored. If `False` then, for each variable,</span>
<span class="sd">            every row where neither the variable nor its parents are `np.NaN` is used.</span>
<span class="sd">            This sets the behavior of the `state_count`-method.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="k">if</span> <span class="n">scoring_method</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">scoring_method</span> <span class="o">=</span> <span class="n">scoring_method</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">scoring_method</span> <span class="o">=</span> <span class="n">K2Score</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>

        <span class="nb">super</span><span class="p">(</span><span class="n">ExhaustiveSearch</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>

<div class="viewcode-block" id="ExhaustiveSearch.all_dags"><a class="viewcode-back" href="../../../estimators.html#pgmpy.estimators.ExhaustiveSearch.ExhaustiveSearch.all_dags">[docs]</a>    <span class="k">def</span> <span class="nf">all_dags</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nodes</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
        <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Computes all possible directed acyclic graphs with a given set of nodes,</span>
<span class="sd">        sparse ones first. `2**(n*(n-1))` graphs need to be searched, given `n` nodes,</span>
<span class="sd">        so this is likely not feasible for n&gt;6. This is a generator.</span>

<span class="sd">        Parameters</span>
<span class="sd">        ----------</span>
<span class="sd">        nodes: list of nodes for the DAGs (optional)</span>
<span class="sd">            A list of the node names that the generated DAGs should have.</span>
<span class="sd">            If not provided, nodes are taken from data.</span>

<span class="sd">        Returns</span>
<span class="sd">        -------</span>
<span class="sd">        dags: Generator object for nx.DiGraphs</span>
<span class="sd">            Generator that yields all acyclic nx.DiGraphs, ordered by number of edges. Empty DAG first.</span>

<span class="sd">        Examples</span>
<span class="sd">        --------</span>
<span class="sd">        &gt;&gt;&gt; import pandas as pd</span>
<span class="sd">        &gt;&gt;&gt; from pgmpy.estimators import ExhaustiveSearch</span>
<span class="sd">        &gt;&gt;&gt; s = ExhaustiveSearch(pd.DataFrame(data={&#39;Temperature&#39;: [23, 19],</span>
<span class="sd">                                                    &#39;Weather&#39;: [&#39;sunny&#39;, &#39;cloudy&#39;],</span>
<span class="sd">                                                    &#39;Humidity&#39;: [65, 75]}))</span>
<span class="sd">        &gt;&gt;&gt; list(s.all_dags())</span>
<span class="sd">        [&lt;networkx.classes.digraph.DiGraph object at 0x7f6955216438&gt;,</span>
<span class="sd">         &lt;networkx.classes.digraph.DiGraph object at 0x7f6955216518&gt;,</span>
<span class="sd">        ....</span>
<span class="sd">        &gt;&gt;&gt; [dag.edges() for dag in s.all_dags()]</span>
<span class="sd">        [[], [(&#39;Humidity&#39;, &#39;Temperature&#39;)], [(&#39;Humidity&#39;, &#39;Weather&#39;)],</span>
<span class="sd">        [(&#39;Temperature&#39;, &#39;Weather&#39;)], [(&#39;Temperature&#39;, &#39;Humidity&#39;)],</span>
<span class="sd">        ....</span>
<span class="sd">        [(&#39;Weather&#39;, &#39;Humidity&#39;), (&#39;Weather&#39;, &#39;Temperature&#39;), (&#39;Temperature&#39;, &#39;Humidity&#39;)]]</span>

<span class="sd">        &quot;&quot;&quot;</span>
        <span class="k">if</span> <span class="n">nodes</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
            <span class="n">nodes</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">state_names</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">nodes</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">6</span><span class="p">:</span>
            <span class="n">warn</span><span class="p">(</span><span class="s2">&quot;Generating all DAGs of n nodes likely not feasible for n&gt;6!&quot;</span><span class="p">)</span>
            <span class="n">warn</span><span class="p">(</span><span class="s2">&quot;Attempting to search through </span><span class="si">{0}</span><span class="s2"> graphs&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="mi">2</span><span class="o">**</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">nodes</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">nodes</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">))))</span>

        <span class="n">edges</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">combinations</span><span class="p">(</span><span class="n">nodes</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>  <span class="c1"># n*(n-1) possible directed edges</span>
        <span class="n">edges</span><span class="o">.</span><span class="n">extend</span><span class="p">([(</span><span class="n">y</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span> <span class="ow">in</span> <span class="n">edges</span><span class="p">])</span>
        <span class="n">all_graphs</span> <span class="o">=</span> <span class="n">powerset</span><span class="p">(</span><span class="n">edges</span><span class="p">)</span>  <span class="c1"># 2^(n*(n-1)) graphs</span>

        <span class="k">for</span> <span class="n">graph_edges</span> <span class="ow">in</span> <span class="n">all_graphs</span><span class="p">:</span>
            <span class="n">graph</span> <span class="o">=</span> <span class="n">nx</span><span class="o">.</span><span class="n">DiGraph</span><span class="p">()</span>
            <span class="n">graph</span><span class="o">.</span><span class="n">add_nodes_from</span><span class="p">(</span><span class="n">nodes</span><span class="p">)</span>
            <span class="n">graph</span><span class="o">.</span><span class="n">add_edges_from</span><span class="p">(</span><span class="n">graph_edges</span><span class="p">)</span>
            <span class="k">if</span> <span class="n">nx</span><span class="o">.</span><span class="n">is_directed_acyclic_graph</span><span class="p">(</span><span class="n">graph</span><span class="p">):</span>
                <span class="k">yield</span> <span class="n">graph</span></div>

<div class="viewcode-block" id="ExhaustiveSearch.all_scores"><a class="viewcode-back" href="../../../estimators.html#pgmpy.estimators.ExhaustiveSearch.ExhaustiveSearch.all_scores">[docs]</a>    <span class="k">def</span> <span class="nf">all_scores</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Computes a list of DAGs and their structure scores, ordered by score.</span>

<span class="sd">        Returns</span>
<span class="sd">        -------</span>
<span class="sd">        list: a list of (score, dag) pairs</span>
<span class="sd">            A list of (score, dag)-tuples, where score is a float and model a acyclic nx.DiGraph.</span>
<span class="sd">            The list is ordered by score values.</span>

<span class="sd">        Examples</span>
<span class="sd">        --------</span>
<span class="sd">        &gt;&gt;&gt; import pandas as pd</span>
<span class="sd">        &gt;&gt;&gt; import numpy as np</span>
<span class="sd">        &gt;&gt;&gt; from pgmpy.estimators import ExhaustiveSearch, K2Score</span>
<span class="sd">        &gt;&gt;&gt; # create random data sample with 3 variables, where B and C are identical:</span>
<span class="sd">        &gt;&gt;&gt; data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list(&#39;AB&#39;))</span>
<span class="sd">        &gt;&gt;&gt; data[&#39;C&#39;] = data[&#39;B&#39;]</span>
<span class="sd">        &gt;&gt;&gt; searcher = ExhaustiveSearch(data, scoring_method=K2Score(data))</span>
<span class="sd">        &gt;&gt;&gt; for score, model in searcher.all_scores():</span>
<span class="sd">        ...   print(&quot;{0}\t{1}&quot;.format(score, model.edges()))</span>
<span class="sd">        -24234.44977974726      [(&#39;A&#39;, &#39;B&#39;), (&#39;A&#39;, &#39;C&#39;)]</span>
<span class="sd">        -24234.449760691063     [(&#39;A&#39;, &#39;B&#39;), (&#39;C&#39;, &#39;A&#39;)]</span>
<span class="sd">        -24234.449760691063     [(&#39;A&#39;, &#39;C&#39;), (&#39;B&#39;, &#39;A&#39;)]</span>
<span class="sd">        -24203.700955937973     [(&#39;A&#39;, &#39;B&#39;)]</span>
<span class="sd">        -24203.700955937973     [(&#39;A&#39;, &#39;C&#39;)]</span>
<span class="sd">        -24203.700936881774     [(&#39;B&#39;, &#39;A&#39;)]</span>
<span class="sd">        -24203.700936881774     [(&#39;C&#39;, &#39;A&#39;)]</span>
<span class="sd">        -24203.700936881774     [(&#39;B&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;A&#39;)]</span>
<span class="sd">        -24172.952132128685     []</span>
<span class="sd">        -16597.30920265254      [(&#39;A&#39;, &#39;B&#39;), (&#39;A&#39;, &#39;C&#39;), (&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16597.30920265254      [(&#39;A&#39;, &#39;B&#39;), (&#39;A&#39;, &#39;C&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16597.309183596342     [(&#39;A&#39;, &#39;B&#39;), (&#39;C&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16597.309183596342     [(&#39;A&#39;, &#39;C&#39;), (&#39;B&#39;, &#39;A&#39;), (&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16566.560378843253     [(&#39;A&#39;, &#39;B&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16566.560378843253     [(&#39;A&#39;, &#39;C&#39;), (&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16268.324549347722     [(&#39;A&#39;, &#39;B&#39;), (&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16268.324549347722     [(&#39;A&#39;, &#39;C&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;B&#39;, &#39;A&#39;), (&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;B&#39;, &#39;C&#39;), (&#39;C&#39;, &#39;A&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;B&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;C&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;B&#39;, &#39;A&#39;), (&#39;B&#39;, &#39;C&#39;), (&#39;C&#39;, &#39;A&#39;)]</span>
<span class="sd">        -16268.324530291524     [(&#39;B&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;A&#39;), (&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        -16237.575725538434     [(&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        -16237.575725538434     [(&#39;C&#39;, &#39;B&#39;)]</span>
<span class="sd">        &quot;&quot;&quot;</span>

        <span class="n">scored_dags</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">([(</span><span class="bp">self</span><span class="o">.</span><span class="n">scoring_method</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">dag</span><span class="p">),</span> <span class="n">dag</span><span class="p">)</span> <span class="k">for</span> <span class="n">dag</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">all_dags</span><span class="p">()],</span>
                             <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
        <span class="k">return</span> <span class="n">scored_dags</span></div>

<div class="viewcode-block" id="ExhaustiveSearch.estimate"><a class="viewcode-back" href="../../../estimators.html#pgmpy.estimators.ExhaustiveSearch.ExhaustiveSearch.estimate">[docs]</a>    <span class="k">def</span> <span class="nf">estimate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Estimates the `BayesianModel` structure that fits best to the given data set,</span>
<span class="sd">        according to the scoring method supplied in the constructor.</span>
<span class="sd">        Exhaustively searches through all models. Only estimates network structure, no parametrization.</span>

<span class="sd">        Returns</span>
<span class="sd">        -------</span>
<span class="sd">        model: `BayesianModel` instance</span>
<span class="sd">            A `BayesianModel` with maximal score.</span>

<span class="sd">        Examples</span>
<span class="sd">        --------</span>
<span class="sd">        &gt;&gt;&gt; import pandas as pd</span>
<span class="sd">        &gt;&gt;&gt; import numpy as np</span>
<span class="sd">        &gt;&gt;&gt; from pgmpy.estimators import ExhaustiveSearch</span>
<span class="sd">        &gt;&gt;&gt; # create random data sample with 3 variables, where B and C are identical:</span>
<span class="sd">        &gt;&gt;&gt; data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list(&#39;AB&#39;))</span>
<span class="sd">        &gt;&gt;&gt; data[&#39;C&#39;] = data[&#39;B&#39;]</span>
<span class="sd">        &gt;&gt;&gt; est = ExhaustiveSearch(data)</span>
<span class="sd">        &gt;&gt;&gt; best_model = est.estimate()</span>
<span class="sd">        &gt;&gt;&gt; best_model</span>
<span class="sd">        &lt;pgmpy.models.BayesianModel.BayesianModel object at 0x7f695c535470&gt;</span>
<span class="sd">        &gt;&gt;&gt; best_model.edges()</span>
<span class="sd">        [(&#39;B&#39;, &#39;C&#39;)]</span>
<span class="sd">        &quot;&quot;&quot;</span>

        <span class="n">best_dag</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">all_dags</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">scoring_method</span><span class="o">.</span><span class="n">score</span><span class="p">)</span>

        <span class="n">best_model</span> <span class="o">=</span> <span class="n">BayesianModel</span><span class="p">()</span>
        <span class="n">best_model</span><span class="o">.</span><span class="n">add_nodes_from</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">best_dag</span><span class="o">.</span><span class="n">nodes</span><span class="p">()))</span>
        <span class="n">best_model</span><span class="o">.</span><span class="n">add_edges_from</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">best_dag</span><span class="o">.</span><span class="n">edges</span><span class="p">()))</span>
        <span class="k">return</span> <span class="n">best_model</span></div></div>
</pre></div>

          </div>
        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="related" role="navigation" aria-label="related navigation">
      <h3>Navigation</h3>
      <ul>
        <li class="right" style="margin-right: 10px">
          <a href="../../../genindex.html" title="General Index"
             >index</a></li>
        <li class="right" >
          <a href="../../../py-modindex.html" title="Python Module Index"
             >modules</a> |</li>
        <li class="nav-item nav-item-0"><a href="../../../index.html">pgmpy 0.1.2 documentation</a> &#187;</li>
          <li class="nav-item nav-item-1"><a href="../../index.html" >Module code</a> &#187;</li> 
      </ul>
    </div>
    <div class="footer" role="contentinfo">
        &#169; Copyright 2016, Ankur Ankan.
      Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.1.
    </div>
  </body>
</html>